library(tidyverse)
library(janitor)
library(naniar)
library(ggmap)
library(shiny)
library("tidyverse")
library("naniar")
library("janitor")
library("stringr")
library(tidyverse)
library(janitor)
library(naniar)
library(tidyverse)
library(skimr)
library(janitor)
library(palmerpenguins)
library(gtools)
library(RColorBrewer)
library(paletteer)
library(ggthemes)
options(scipen = 999)
locations <- read.csv("data/Health_Facility_General_Information.csv") %>%
clean_names()
cardiac <- read_csv("data/cardiac-surgery.csv") %>%
clean_names()
## Rows: 1609 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Hospital Name, Detailed Region, Region, Procedure, Year of Hospital...
## dbl (8): Facility ID, Number of Cases, Number of Deaths, Observed Mortality ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
locations <- locations %>%
group_by(facility_id) %>%
summarise(across(everything(), first))
cardiac <- left_join(cardiac, locations, by="facility_id")
Have to separate the data, as some of them are over a range of a couple of years, some of them are year by year
cardiac <- cardiac %>%
filter(str_detect(year_of_hospital_discharge, "-", negate = TRUE))
Map Data:
register_stadiamaps("e2de824a-0995-4a51-915c-33c14c061e7b", write = FALSE)
cardiac %>% #GET MAX LAT AND LON FROM CODE BELOW!!! EASY MAKES SENSE
select(facility_longitude, facility_latitude) %>%
summary()
## facility_longitude facility_latitude
## Min. :-78.87 Min. :40.58
## 1st Qu.:-74.92 1st Qu.:40.74
## Median :-73.95 Median :40.88
## Mean :-74.63 Mean :41.61
## 3rd Qu.:-73.80 3rd Qu.:42.73
## Max. :-72.98 Max. :44.70
## NA's :53 NA's :53
lat <- c(40.58, 44.70) #we got tehse values from summary above
long <- c(-78.87, -72.98)
bbox <- make_bbox(long, lat, f = 0.03)
maptotal <- get_stadiamap(bbox, maptype = "stamen_terrain", zoom=7) #Make ur basemap, there are other styles which you could find from this link, https://docs.stadiamaps.com/themes/
## ℹ © Stadia Maps © Stamen Design © OpenMapTiles © OpenStreetMap contributors.
x <- cardiac %>%
arrange(hospital_name) %>%
group_by(facility_id, hospital_name, facility_latitude, facility_longitude) %>%
summarise(number_of_cases=sum(number_of_cases), average_moratality_rate=mean(observed_mortality_rate), .groups = 'keep') %>%
mutate(total_deaths=average_moratality_rate/100*number_of_cases) %>%
mutate(facility_latitude=as.numeric(facility_latitude), facility_longitude=as.numeric(facility_longitude))
lat1 <- c(40.58, 41.2) #we got tehse values from summary above
long1 <- c(-74.5, -72.98)
bbox1 <- make_bbox(long1, lat1, f = 0.03)
mapnyc <- get_stadiamap(bbox1, maptype = "stamen_terrain", zoom=8)
## ℹ © Stadia Maps © Stamen Design © OpenMapTiles © OpenStreetMap contributors.
cardiac %>%
mutate(across(everything(), tolower)) %>%
mutate(across(everything(), ~ str_replace_all((.), " ", "_"))) %>%
mutate(across(everything(), ~ str_replace_all((.), "__", "_"))) %>%
mutate(across(-"year_of_hospital_discharge", ~ str_remove((.), "-"))) %>%
mutate(across(number_of_cases:risk_adjusted_mortality_rate, as.numeric))
## # A tibble: 968 × 49
## facility_id hospital_name detailed_region region procedure
## <chr> <chr> <chr> <chr> <chr>
## 1 1 albany_med._ctr capital_district capit… all_pci
## 2 116 arnot_ogden_med_ctr western_ny__rochest… weste… all_pci
## 3 746 bassett_medical_center capital_district capit… all_pci
## 4 1438 bellevue_hospital_ctr manhattan ny_me… all_pci
## 5 1178 bronxlebanon-concourse bronx ny_me… all_pci
## 6 1286 brookdale_univ_hosp_med_ctr kings ny_me… all_pci
## 7 885 brookhaven_memorial ny_metro__long_isla… ny_me… all_pci
## 8 207 buffalo_general_med_ctr western_ny__buffalo weste… all_pci
## 9 977 cayuga_med_ctr_ithaca central_ny centr… all_pci
## 10 636 crouse_hospital central_ny centr… all_pci
## # ℹ 958 more rows
## # ℹ 44 more variables: year_of_hospital_discharge <chr>, number_of_cases <dbl>,
## # number_of_deaths <dbl>, observed_mortality_rate <dbl>,
## # expected_mortality_rate <dbl>, risk_adjusted_mortality_rate <dbl>,
## # lower_limit_of_confidence_interval <chr>,
## # upper_limit_of_confidence_interval <chr>, comparison_results <chr>,
## # facility_name <chr>, short_description <chr>, description <chr>, …
What is the procedure with the highest observed mortality rate?
cardiac %>%
group_by(procedure) %>%
summarize(avg_mortality = mean(observed_mortality_rate)) %>%
arrange(-avg_mortality)
## # A tibble: 3 × 2
## procedure avg_mortality
## <chr> <dbl>
## 1 CABG 1.52
## 2 All PCI 1.22
## 3 Non-Emergency PCI 0.720
cardiac %>%
group_by(procedure) %>%
summarize(avg_mortality = mean(observed_mortality_rate)) %>%
ggplot(aes(x = reorder(procedure, avg_mortality), y = avg_mortality))+
geom_col(color = "black", fill = "lightblue")+
coord_flip()+
labs(title = "Average Observed Mortality Rate by Procedure",
x = "Procedure",
y = "Mortality Rate")+
theme_light(base_size = 12)
How has the mortality rate changed for cardiac surgery over time in each region?
#I'll come back to this later if I figure out the year stuff but feel free to take a crack at it :)
cardiac %>%
group_by(year_of_hospital_discharge) %>%
summarize(observed_mortality_rate=mean(observed_mortality_rate)) %>%
ggplot(aes(x =year_of_hospital_discharge, y = observed_mortality_rate))+
geom_col(color = "purple4", na.rm = T) + geom_smooth(method = lm, se= TRUE) +
labs(title = "Regional Cardiac Surgery Morality Rates",
x = "Year of Hospital Discharge",
y = "Mean Observed Mortality Rate")
## `geom_smooth()` using formula = 'y ~ x'
cardiac %>%
filter(hospital_name == "albany_med._ctr") %>%
group_by(year_of_hospital_discharge, hospital_name) %>%
summarize(avg_mortality = mean(observed_mortality_rate), .groups = 'keep') %>%
ggplot(aes(x = as.factor(year_of_hospital_discharge), y = avg_mortality, group = hospital_name))+
geom_line()+
geom_point()
How does the expected mortality rate compare to the observed mortality rate?
#I don't know which graph to plot for this one but if you can figure it out that would be great. I was thinking maybe scatter but I don't know for sure
#Expected mortality vs observed mortality
cardiac %>%
ggplot(aes(x = expected_mortality_rate, y = observed_mortality_rate))+
geom_point(color = "steelblue") + geom_smooth(method = lm, se= TRUE) +
labs(title = "Expected vs Observed Mortality Rate",
x = "Expected Mortality",
y = "Observed Mortality")
## `geom_smooth()` using formula = 'y ~ x'
Which region has the highest mortality rate?
cardiac %>%
group_by(region) %>%
summarize(avg_mortality = mean(observed_mortality_rate)) %>%
arrange(-avg_mortality)
## # A tibble: 8 × 2
## region avg_mortality
## <chr> <dbl>
## 1 Central NY 1.41
## 2 Western NY - Rochester 1.26
## 3 NY Metro - New Rochelle 1.12
## 4 Capital District 1.10
## 5 NY Metro - NYC 1.09
## 6 New York State 1.07
## 7 NY Metro - Long Island 0.935
## 8 Western NY - Buffalo 0.911
cardiac %>%
group_by(region) %>%
summarize(avg_mortality = mean(observed_mortality_rate)) %>%
ggplot(aes(x = reorder(region, avg_mortality), y = avg_mortality))+
geom_col(color = "black", fill = "lightpink")+
coord_flip()+
labs(title = "Average Observed Mortality Rate per Region",
x = "Region",
y = "Mortality Rate")+
theme_light(base_size = 12)
COMPARING NUMBER OF TOTAL PROCEDURES WITH AVERAGE MORTALITY RATE
total_cases <- cardiac %>%
group_by(facility_id) %>%
summarize(total_procedures = sum(number_of_cases), avg_mortality = mean(observed_mortality_rate)) %>%
ggplot(aes(x = total_procedures, y = avg_mortality))+
geom_point(color = "olivedrab") +
labs(title = "Average Mortality Rate compared with Procedure Numbers ",
x = "Total Number of Procedure",
y = "Mortality Rate")+
theme(axis.text.y = element_text(angle = 0, hjust = 1, size = 5))+
scale_x_log10()+geom_smooth(method=lm, se=F)
total_cases
## `geom_smooth()` using formula = 'y ~ x'
Map:
#Overall deaths for all years:
#All hospital map
ggmap(maptotal) +
geom_point(data = x, aes(facility_longitude, facility_latitude)) + labs(x= "Longitude", y= "Latitude", title="Cardiac Locations")
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
#For number of cases
ggmap(maptotal) +
geom_point(data = x, aes(facility_longitude, facility_latitude, colour = number_of_cases)) + #WE just add the chart in without using ggplot!
labs(x= "Longitude", y= "Latitude", title="Cardiac Locations")
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
#For mortality rate
ggmap(maptotal) +
geom_point(data = x, aes(facility_longitude, facility_latitude, colour = average_moratality_rate)) + #WE just add the chart in without using ggplot!
labs(x= "Longitude", y= "Latitude", title="Cardiac Locations")
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
#For total deaths
ggmap(maptotal) +
geom_point(data = x, aes(facility_longitude, facility_latitude, colour = total_deaths)) + #WE just add the chart in without using ggplot!
labs(x= "Longitude", y= "Latitude", title="Cardiac Locations")
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
#NYC Rate of mortaility
ggmap(mapnyc) +
geom_point(data = x, aes(facility_longitude, facility_latitude, colour = average_moratality_rate)) +
labs(x= "Longitude", y= "Latitude", title="Cardiac Locations") #colour??
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).
#NYC Total Cases
ggmap(mapnyc) +
geom_point(data = x, aes(facility_longitude, facility_latitude, colour = number_of_cases)) +
labs(x= "Longitude", y= "Latitude", title="Cardiac Locations") #colour??
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).
#NYC total deaths
ggmap(mapnyc) +
geom_point(data = x, aes(facility_longitude, facility_latitude, colour = total_deaths)) +
labs(x= "Longitude", y= "Latitude", title="Cardiac Locations") #colour??
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).
library(shinydashboard)
##
## Attaching package: 'shinydashboard'
## The following object is masked from 'package:graphics':
##
## box
ui <- dashboardPage(skin = "purple",
dashboardHeader(title = "Cardiac Dashboard"),
## Sidebar content
dashboardSidebar(
sidebarMenu(
menuItem("Mortality Rate",
tabName = "dashboard",
icon = icon("skull")),
menuItem("Procedures",
tabName = "widgets",
icon = icon("stethoscope")),
menuItem("Cases",
tabName = "cases",
icon = icon("folder-open")),
menuItem("Maps",
tabName = "map",
icon = icon("earth-americas")),
menuItem("NYC Maps",
tabName = "nyc",
icon = icon("hotel"))
)
),
## Body content
dashboardBody(
tabItems(
## First tab content
tabItem(tabName = "dashboard",
h1(strong("These are plots produced based on the mortality rate observed in each New York hospital based on cardiac surgery results."), style = "font-size:20px"),
fluidRow(
box(plotOutput("plot1a", height = 250)),
box(title = "Controls",
selectInput("region",
"Select Region of Interest:",
choices=unique(cardiac$region))
)
),
fluidRow(
box(plotOutput("plot1b", height = 250)),
box(title = "Controls",
selectInput("year",
"Select Year of Interest:",
choices = unique(cardiac$year_of_hospital_discharge)))
),
fluidRow(
box(plotOutput("plot1c", height = 250)),
box(title = "Controls",
selectInput("hospital",
"Select Hospital of Interest:",
choices = unique(cardiac$hospital_name)))
)
),
## Second tab item
tabItem(tabName = "widgets",
h1(strong("Number of patients broken down by procedure type or region in New York Hospitals"), style = "font-size:20px"),
fluidRow(
box(plotOutput("plot2", height = 250)), # box is a container for the plot
box(title = "Controls", # box is a container for the controls
radioButtons("x",
"Select Fill Variable",
choices=c("region", "procedure"),
selected="region")
)
)
),
## Third tab item
tabItem(tabName = "cases",
h1(strong("These are plots produced based on the number of cases of cardiac surgery performed in New York Hospitals."), style = "font-size:20px"),
fluidRow(
box(plotOutput("plot3a", height = 250)), # box is a container for the plot
box(title = "Controls", # box is a container for the controls
selectInput("a",
"Select Hospital",
choices=c(unique(cardiac$hospital_name))
)
)
),
fluidRow(
box(plotOutput("plot3b", height = 250)), # box is a container for the plot
box(title = "Controls", # box is a container for the controls
selectInput("b",
"Select Year:",
choices=c(unique(cardiac$year_of_hospital_discharge))
)
)
)
),
tabItem(tabName = "map",
h1(strong("These are overall maps addressing different scenarios based on our dataset."), style = "font-size:20px"),
fluidRow(
box(plotOutput("plot4a", height = 450)),
box(plotOutput("plot4b", height = 450)),
box(plotOutput("plot4c", height = 450)),
box(plotOutput("plot4d", height = 450))
)
),
tabItem(tabName = "nyc",
h1(strong("These are maps zoomed in on New York City addressing different scenarios based on our dataset."), style = "font-size:20px"),
fluidRow(
box(plotOutput("plot5a", height = 450)),
),
fluidRow(
box(plotOutput("plot5b", height = 450))
),
fluidRow(
box(plotOutput("plot5c", height = 450))
),
)
)
)
)
server <- function(input, output, session) {
session$onSessionEnded(stopApp)
output$plot1a <- renderPlot({
cardiac %>%
filter(region == input$region) %>%
ggplot(aes(x = observed_mortality_rate))+
geom_density(color = "black", fill = "steelblue", alpha = 0.6)+
labs(title = "Observed Mortality Rate Distribution by Region",
x = "Mortality Rate",
y = "Proportion of NY Regions")
})
output$plot1b <- renderPlot({
cardiac %>%
filter(year_of_hospital_discharge == input$year) %>%
ggplot(aes(x = observed_mortality_rate))+
geom_density(color = "black", fill = "steelblue", alpha = 0.6)+
labs(title = "Observed Mortality Rate Distribution by Year",
x = "Mortality Rate",
y = "Proportion of Hospitals")
})
output$plot1c <- renderPlot({
cardiac %>%
filter(hospital_name == input$hospital) %>%
group_by(year_of_hospital_discharge, hospital_name) %>%
summarize(avg_mortality = mean(observed_mortality_rate), .groups = 'keep') %>%
ggplot(aes(x = as.factor(year_of_hospital_discharge), y = avg_mortality, group = hospital_name))+
geom_line()+
geom_point()+
labs(title = "Mortality Rate Over Time",
x = "Year",
y = "Mortality Rate")
})
output$plot2 <- renderPlot({
cardiac %>%
ggplot(aes_string(x="region", fill=input$x))+
geom_bar(position="dodge", alpha=0.8, color="black")+
labs(x=NULL, y=NULL, fill="Fill Variable")+theme(axis.text.x = element_text(angle=90,hjust = 1))
})
output$plot3a <- renderPlot({
cardiac %>%
filter(hospital_name == input$a) %>%
group_by(year_of_hospital_discharge) %>%
summarize(total_cases_a = sum(number_of_cases)) %>%
ggplot(aes_string(x="year_of_hospital_discharge", y = "total_cases_a"))+
geom_col(alpha=0.8, color="black", fill = "steelblue")+
labs(title = "Number of Cases per Year",
x= "Year",
y= "Number of Cases")+
theme_light(base_size = 14)
})
output$plot3b <- renderPlot({
cardiac %>%
filter(year_of_hospital_discharge == input$b) %>%
group_by(facility_id, procedure) %>%
summarize(total_procedures = sum(number_of_cases), avg_mortality = mean(observed_mortality_rate)) %>%
ggplot(aes(x = total_procedures, y = avg_mortality))+
geom_point(color = "olivedrab") +
labs(title = "Average Mortality Rate compared with Procedure Numbers ",
x = "Total Number of Procedures",
y = "Mortality Rate")+
theme(axis.text.y = element_text(angle = 0, hjust = 1, size = 5))+
theme_classic()+
scale_x_log10()+
geom_smooth(method=lm, se=F)
})
output$plot4a <- renderPlot({
ggmap(maptotal) +
geom_point(data = x, aes(facility_longitude, facility_latitude, size = 2)) + labs(x= "Longitude", y= "Latitude", title="Cardiac Locations")
})
output$plot4b <- renderPlot({
ggmap(maptotal) +
geom_point(data = x, aes(facility_longitude, facility_latitude, colour = number_of_cases, size = 2)) + #WE just add the chart in without using ggplot!
labs(x= "Longitude", y= "Latitude", title="Cardiac Locations")
})
output$plot4c <- renderPlot({
ggmap(maptotal) +
geom_point(data = x, aes(facility_longitude, facility_latitude, colour = average_moratality_rate, size = 2)) + #WE just add the chart in without using ggplot!
labs(x= "Longitude", y= "Latitude", title="Cardiac Locations")
})
output$plot4d <- renderPlot({
ggmap(maptotal) +
geom_point(data = x, aes(facility_longitude, facility_latitude, colour = total_deaths, size = 2)) + #WE just add the chart in without using ggplot!
labs(x= "Longitude", y= "Latitude", title="Cardiac Locations")
})
output$plot5a <- renderPlot({
ggmap(mapnyc) +
geom_point(data = x, aes(facility_longitude, facility_latitude, colour = average_moratality_rate, size = 2.5)) +
labs(x= "Longitude", y= "Latitude", title="Cardiac Locations")
})
output$plot5b <- renderPlot({
ggmap(mapnyc) +
geom_point(data = x, aes(facility_longitude, facility_latitude, colour = number_of_cases, size = 2.5)) +
labs(x= "Longitude", y= "Latitude", title="Cardiac Locations")
})
output$plot5c <- renderPlot({
ggmap(mapnyc) +
geom_point(data = x, aes(facility_longitude, facility_latitude, colour = total_deaths, size = 2.5)) +
labs(x= "Longitude", y= "Latitude", title="Cardiac Locations")
})
}
shinyApp(ui, server)